


***********************************************
****** The Impact of Soft-Skills Training *****
******    for Entrepreneurs in Jamaica    *****
***********************************************


* This file prepares the data coming from the questionnaire for follow-up 1, to use for the analysis


clear all
cap log close
set more off

cap cd "$directory"



************
*** Data ***
************

*** Load follow-up 1 data ***

* Load questionnaire data
use "$data_int/fu1_data_cleaned", clear


*** Merge with other data ***

* Merge with data from field visit
merge 1:1 respondent_id using "$data_int/fu1_field_data", gen(merge_field)
lab var merge_field "Merge with field data"
lab def merge_field 1 "Questionnaire only" 2 "Field data only" 3 "Questionn. and field data"
lab val merge_field merge_field

* Merge with treatment assignment
merge 1:1 respondent_id using "$data_int/treatment_assignment", assert(match using) keep(match using) nogen
order treatment t1 t2 strata, after(respondent_id)


*** Define labels ***

lab def yn 0 "No" 1 "Yes", modify

lab def months ///
	1 "January" ///
	2 "February" ///
	3 "March" ///
	4 "April" ///
	5 "May" ///
	6 "June" ///
	7 "July" ///
	8 "August" ///
	9 "September" ///
	10 "October" ///
	11 "November" ///
	12 "December" ///
	99 "Don't know" ///
	.a "Don't know"



************************
*** System Variables ***
************************

*** Syst. vars: generate new variables ***

* Generate datetime variables
gen date_interview=date(VStart,"YMDhms")
lab var date_interview "Date of interview"
format date_interview %tdDD/NN/CCYY

* Record month of the interview
gen month_interview=month(date_interview)
lab var month_interview "Month of interview"
lab val month_interview months



****************
*** MODULE B ***
****************

*** Mod. B: Edit existing variables ***

* Self-employment
gen selfemp=(B1A==1) if !mi(B1A)
lab var selfemp "Self employed"
lab val selfemp yn

* Employees
gen has_employees=(B2==1) if !mi(B2)
destring B2A, gen(employees_fulltime)
destring B2B, gen(employees_parttime)
replace employees_fulltime=0 if mi(employees_fulltime) & !mi(has_employees) & merge_field!=2
replace employees_parttime=0 if mi(employees_parttime) & !mi(B2)
gen pemployees=0.5*employees_parttime
egen total_employees=rowtotal(employees_fulltime pemployees), missing
drop pemployees
replace total_employees=0 if selfemp==0
lab var has_employees "Has employees"
lab var employees_fulltime "N. of full-time employees"
lab var employees_parttime "N. of part-time employees"
lab var total_employees "N. of employees"
lab val has_employees yn

* Amount of sales
destring B3A, gen(sales_lastm)
destring B3B, gen(sales_typm)
replace sales_lastm=0 if selfemp==0
replace sales_typm=0 if selfemp==0
lab var sales_lastm "Sales in the last month"
lab var sales_typm "Sales in a typical month"

* Amount of costs
destring B4A, gen(costs_lastm)
destring B4B, gen(costs_typm)
replace costs_lastm=0 if selfemp==0
replace costs_typm=0 if selfemp==0
lab var costs_lastm "Total costs in the last month"
lab var costs_typm "Total costs in a typical month"

* Amount of profits
destring B5A, gen(profits_lastm)
destring B5B, gen(profits_typm)
replace profits_lastm=0 if selfemp==0
replace profits_typm=0 if selfemp==0
lab var profits_lastm "Profits in the last month"
lab var profits_typm "Profits in a typical month"

* Investments
gen investment=(B6==1) if !mi(B6)
replace investment=0 if selfemp==0
destring B6_1, gen(investment_amount)
replace investment_amount=0 if investment==0
lab var investment "Made an investment"
lab var investment_amount "Amount of the investment"
lab val investment yn

* Innovation
gen innovation=(B7==1) if !mi(B7)
replace innovation=0 if selfemp==0
lab var innovation "Introduced innovation"
lab val innovation yn

* Loans
gen loan_requested=(B8_1==1) if !mi(B8_1)
replace loan_requested=0 if selfemp==0
gen loan_approved=(B8_2==1) if !mi(B8_2)
replace loan_approved=0 if loan_requested==0
lab var loan_requested "Requested any loan"
lab var loan_approved "Loan approved"
lab val loan_requested loan_approved yn

* Accounting
gen formal_accounts=(B9==1) if !mi(B9)
gen informal_accounts=(B9==2 | B9==3) if !mi(B9)
replace formal_accounts=0 if selfemp==0
replace informal_accounts=0 if selfemp==0
lab var formal_accounts "Keeps formal accounts"
lab var informal_accounts "Keeps informal accounts"
lab val formal_accounts informal_accounts yn


*** Mod. B: Create new variables ***

* Firm survival (which accounts also for data for attriters, from field visits/calls)
gen has_business=selfemp
replace has_business=selfemp_attrit if missing(selfemp)
lab var has_business "Owns a business"
lab val has_business yn

* Dummy for positive profits
gen pos_profits_lastm=(profits_lastm>0) if !missing(profits_lastm)
replace pos_profits_lastm=0 if selfemp==0
lab var pos_profits_lastm "Positive profits in the last month"



****************
*** MODULE E ***
****************

*** Mod. E: Edit existing variables ***

* Business practices
forvalues i=1/7 {
	recode I_`i'_E1 (2 = 0), gen(busprac_`i')
	lab var busprac_`i' "Business practice `i'"
}
lab val busprac_* yn



**********************************
*** Variables for the analysis ***
**********************************

*** Edit variables for respondents without business ***

* Set variables to 0 for those without a business (accounting also for attriters for whom we have info)
foreach var of varlist sales_lastm sales_typm profits_lastm profits_typm costs_lastm costs_typm pos_profits_lastm innovation busprac_* ///
		employees_fulltime employees_parttime has_employees total_employees investment investment_amount loan_requested loan_approved {
	replace `var'=0 if has_business==0
}

* Replace month of interview with 99 (DK) for attriters without a business (to keep in regressions)
replace month_interview=99 if missing(month_interview) & !missing(has_business)


*** Transform variables ***

* Winsorize variables (only top 99%)
foreach var of varlist sales_lastm sales_typm costs_lastm investment_amount {
	clonevar win_`var'=`var'
	summ `var' if selfemp==1, de
	replace win_`var'=r(p99) if win_`var'>r(p99) & !mi(win_`var')
	lab var win_`var' "Winsorized `= strlower("`: var lab `var''")'"
}

* Winsorize variables (bottom 1% and top 99%)
foreach var of varlist profits_lastm profits_typm {
	clonevar win_`var'=`var'
	summ `var' if selfemp==1, de
	replace win_`var'=r(p1) if win_`var'<r(p1) & !mi(win_`var')
	replace win_`var'=r(p99) if win_`var'>r(p99) & !mi(win_`var')
	lab var win_`var' "Winsorized `= strlower("`: var lab `var''")'"
}

* Transform monetary variables variables with inverse hypebolic sine (IHS)
foreach var of varlist sales_lastm sales_typm profits_lastm profits_typm {
	gen ihs_`var'=ln(`var'+sqrt(`var'^2+1))
	lab var ihs_`var' "IHS of `= strlower("`: var lab `var''")'"
}


*** Create sales and profits index ***

* Compute z-scores
foreach var of varlist sales_lastm win_sales_lastm ihs_sales_lastm profits_lastm win_profits_lastm ihs_profits_lastm {
	summ `var' if treatment==0
	gen z_`var'=(`var'-r(mean))/r(sd)
	lab var z_`var' "Z-score: `: var lab `var''"
}

* Generate sales and profits index
egen sales_profits_index=rowmean(z_sales_lastm z_win_sales_lastm z_ihs_sales_lastm z_profits_lastm z_win_profits_lastm z_ihs_profits_lastm)
lab var sales_profits_index "Sales and profits index"

* Replace sales and profits index with missing if any component is missing
replace sales_profits_index=. if mi(sales_lastm) | mi(profits_lastm)

* Standardize sales and profits index
summ sales_profits_index if treatment==0
replace sales_profits_index=(sales_profits_index-r(mean))/r(sd)


*** Create capital and labor index ***

* Compute z-scores
foreach var of varlist total_employees employees_fulltime employees_parttime investment win_investment_amount {
	summ `var' if treatment==0
	gen z_`var'=(`var'-r(mean))/r(sd)
	lab var z_`var' "Z-score: `: var lab `var''"
}

* Create capital and labor index
egen inputs_index=rowmean(z_total_employees z_employees_fulltime z_employees_parttime z_investment z_win_investment_amount)
lab var inputs_index "Capital and labor inputs index"


*** Create business practices index ***

* Generate business practice index as a share of business practices adopted (i.e. going 0-1)
egen business_practices_index=rowmean(busprac_*)
replace business_practices_index=0 if has_business==0
lab var business_practices_index "Business practices index"


*** Create personal initiative index ***

* Compute z-scores 
foreach var of varlist I_1_E2 I_2_E2 I_3_E2 I_4_E2 I_5_E2 I_6_E2 I_7_E2 {
	summ `var' if treatment==0
	gen z_`var'=(`var'-r(mean))/r(sd)
	lab var z_`var' "Z-score: `: var lab `var''"
}

* Create personal initiative index
egen personal_initiative_index=rowmean(z_I_1_E2 z_I_2_E2 z_I_3_E2 z_I_4_E2 z_I_5_E2 z_I_6_E2 z_I_7_E2)
lab var personal_initiative_index "Personal initiative index"

* Standardize personal initiative index
summ personal_initiative_index if treatment==0
replace personal_initiative_index=(personal_initiative_index-r(mean))/r(sd)



**************
*** Saving ***
**************

* Save dataset
compress
save "$data_fin/fu1_data_final", replace


